Syllogism Validation with BERT

Python

Deep Learning

NLP

Given two premises this validation model can classify validity with 85% accuracy on a 50/50 split dataset.

Author

Jake Gehri

Published

November 27, 2022

import pandas as pd
import torch
from torch import nn
import torch.nn.functional as F
import transformers
from transformers import DistilBertTokenizer
from transformers import DistilBertForSequenceClassification
from transformers import Trainer, TrainingArguments
from datasets import load_metric
import numpy as np

df = pd.read_csv('Avicenna_Train.csv', encoding='ISO-8859-1')

df.head()

	Premise 1	Premise 2	Syllogistic relation	Conclusion
0	unchecked imbalances in the society, will see ...	correct these imbalances requires in-depth kno...	no	No conclusion
1	Chronic diseases are heart attacks and stroke,...	In populations that eat a regular high-fiber d...	yes	In populations that eat a regular high-fiber d...
2	Formative assessment encourages children to en...	An ideal learning environment uses formative a...	yes	An ideal learning environment encourages child...
3	Underrepresented female labor force in some pr...	Job discrimination comes with underrepresented...	yes	Job discrimination comes with not being able t...
4	damaged mentality in an individual brings seri...	Aggression harms the mentality of person.	yes	Aggression brings brings serious health proble...

df['label'] = df['Syllogistic relation'].eq('yes').mul(1)

df['text'] = (df['Premise 1'] + " : " + df['Premise 2'])

df['label'].value_counts()

1    2427
0    2373
Name: label, dtype: int64

int(len(df) * 0.8)

train_texts = df.iloc[:3840]['text'].values
train_labels = df.iloc[:3840]['label'].values

valid_texts = df.iloc[3840:]['text'].values
valid_labels = df.iloc[3840:]['label'].values

tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

train_encodings = tokenizer(list(train_texts), truncation=True, padding=True)
valid_encodings = tokenizer(list(valid_texts), truncation=True, padding=True)

class SyllogismDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item
    
    def __len__(self):
        return len(self.labels)

train_dataset = SyllogismDataset(train_encodings, train_labels)
valid_dataset = SyllogismDataset(valid_encodings, valid_labels)

train_dataloader = torch.utils.data.DataLoader2(train_dataset, batch_size=16, shuffle=True)
valid_dataloader = torch.utils.data.DataLoader2(valid_dataset, batch_size=16, shuffle=True)

model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased')

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

DEVICE = 'cuda'

model.train()

metrics = load_metric('accuracy')

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    
    predictions = np.argmax(logits, axis=-1)
    return metrics.compute(predictions=predictions, references=labels)

training_args = TrainingArguments(output_dir='./results', num_train_epochs=3, per_device_train_batch_size=16,
                                 per_device_eval_batch_size=16, logging_dir='./logs', logging_steps=72)

trainer = Trainer(model=model, 
                  args=training_args, 
                  train_dataset=train_dataset, 
                  eval_dataset=valid_dataset,
                  compute_metrics=compute_metrics
                 )

trainer.train()

/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning
  warnings.warn(
***** Running training *****
  Num examples = 3840
  Num Epochs = 3
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 720

[720/720 01:36, Epoch 3/3]

Step	Training Loss
72	0.658500
144	0.492300
216	0.413400
288	0.298300
360	0.253200
432	0.216700
504	0.178600
576	0.106900
648	0.106800
720	0.091800

Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)

TrainOutput(global_step=720, training_loss=0.281636557314131, metrics={'train_runtime': 97.56, 'train_samples_per_second': 118.081, 'train_steps_per_second': 7.38, 'total_flos': 289110097566720.0, 'train_loss': 0.281636557314131, 'epoch': 3.0})

trainer.evaluate()

***** Running Evaluation *****
  Num examples = 960
  Batch size = 16

[60/60 02:07]

{'eval_loss': 0.4387502670288086,
 'eval_accuracy': 0.88125,
 'eval_runtime': 2.2301,
 'eval_samples_per_second': 430.476,
 'eval_steps_per_second': 26.905,
 'epoch': 3.0}

df_test = pd.read_csv('Avicenna_Test.csv', encoding='ISO-8859-1')

df_test['label'] = df_test['Syllogistic relation'].eq('yes').mul(1)
df_test['text'] = (df_test['Premise 1'] + " : " + df_test['Premise 2'])

test_texts = df_test['text'].values
test_labels = df_test['label'].values

test_encodings = tokenizer(list(test_texts), truncation=True, padding=True)

test_dataset = SyllogismDataset(test_encodings, test_labels)

test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=16, shuffle=True)

trainer.evaluate(test_dataset)

***** Running Evaluation *****
  Num examples = 1200
  Batch size = 16

{'eval_loss': 0.5759531855583191,
 'eval_accuracy': 0.8525,
 'eval_runtime': 2.8515,
 'eval_samples_per_second': 420.837,
 'eval_steps_per_second': 26.302,
 'epoch': 3.0}

sample_text = ['Socrates is a man : all men are mortal']
sample_label = [1]

sample_encoded = tokenizer(sample_text, truncation=True, padding=True)

sample_dataset = SyllogismDataset(sample_encoded, sample_label)
sample_dataset

<__main__.SyllogismDataset at 0x7f63a4fccd60>

trainer.predict(sample_dataset).label_ids

***** Running Prediction *****
  Num examples = 1
  Batch size = 16

array([1])

sample_text_2 = ['If the streets are wet, it has rained recently : The streets are wet.']
sample_label_2 = [0]

sample_encoded_2 = tokenizer(sample_text_2, truncation=True, padding=True)

sample_dataset_2 = SyllogismDataset(sample_encoded_2, sample_label_2)

trainer.predict(sample_dataset_2).label_ids

***** Running Prediction *****
  Num examples = 1
  Batch size = 16

array([0])